Code
stopifnot(
require(patchwork),
require(httr),
require(glue),
require(ineq),
require(here),
require(slider),
require(tidyverse),
require(gtools)
)
# old_theme <- theme_set(theme_minimal())stopifnot(
require(patchwork),
require(httr),
require(glue),
require(ineq),
require(here),
require(slider),
require(tidyverse),
require(gtools)
)
# old_theme <- theme_set(theme_minimal())path_data <- 'DATA'
fname <- 'nat2021_csv.zip'
fpath <- here(path_data, fname)
if (!file.exists(fpath)){
url <- "https://www.insee.fr/fr/statistiques/fichier/2540004/nat2021_csv.zip"
download.file(url, fpath, mode="wb")
}
df_fr <- readr::read_csv2(fpath) if (!require("babynames")){
install.packages("babynames")
stopifnot(require("b,abynames"), "Couldn't install and load package 'babynames'")
}lkp <- list(year="annais",
sex="sexe",
name="preusuel",
n="nombre")births_fr_path <- here(path_data, 't35.fr.xls')
births_fr_url <- 'https://www.ined.fr/fichier/s_rubrique/168/t35.fr.xls'
if (!file.exists(births_fr_path)) {
download.file(births_fr_url, births_fr_path)
}births_fr <- readxl::read_excel(births_fr_path, skip = 3)
births_fr <- births_fr[-1, ]
names(births_fr)[1] <- "year"
births_fr <- births_fr |>
mutate(year=as.integer(year)) |>
drop_na() babynames <- babynames |>
mutate(country='us') |>
mutate(sex=as_factor(sex))
births_us <- birthsdf <- bind_rows(babynames, df_fr)df <- df |>
filter(year > 1947) |>
drop_na() |>
filter(name!='_PRENOMS_RARES')df <- df |>
group_by(year, sex, country) |>
arrange(desc(n), .by_group=T) |>
mutate(rnk=row_number(),
rrnk=rnk/n(),
cprop=cumsum(prop)) |>
ungroup() min_maj <- function(cprop, rrnk){
1- rrnk[findInterval(.5, cprop)]
}last_dec <- function(cprop, rrnk) {
cprop[findInterval(.1, rrnk)]
}ineq_idx_fns <- list(
gini=Gini,
atkinson=Atkinson,
ent=entropy,
theil=Theil)
ineq_idxes <- df |>
summarize(
across(n, .fns=ineq_idx_fns),
n_alker=min_maj(cprop, rrnk),
n_last_dec=last_dec(cprop, rrnk),
.by= c(year, sex, country),
) |>
pivot_longer(
cols=starts_with("n"),
names_to="index_name",
values_to="index")ineq_idxes |>
ggplot() +
aes(x=year, y=index, color=sex) +
geom_line() +
facet_grid(rows=vars(index_name), cols=vars(country), scales="free_y")df <- df |>
group_by(country, sex, name) |>
mutate(best_rnk=min(rnk)) |>
ungroup()(
df |>
filter(rrnk<.9, round(10000*rrnk)%%10==1) |>
ggplot() +
aes(x=1-rrnk, y=1-cprop, color=sex, frame=year) +
geom_point(size=.2) +
coord_fixed() +
facet_wrap(~ country)
) |>
plotly::ggplotly()